In this sub-vignette we present the analysis and source code for figure 7. This sub-vignette can be built along with all other sub-vignettes by running CLLCytokineScreen2021.Rmd.
Load libraries
library(patchwork)
library(maxstat)
library(survminer)
library(survival)
library(ggpubr)
library(rlang)
library(ggbeeswarm)
library(dplyr)
Set plot directory
plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
Raw
#ihc_patient_data: 596 x 7 tibble containing IHC staining intensities forpIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue
load( "../../data/ihc_patient_data.RData")
#ihc_surv_data: 200 x 15 tibble containing IHC staining intensities for pIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue, along with meta data (including clinical outcomes) on each patient sample
load( "../../data/ihc_surv_data.RData")
#ihc_patient_data: 596 x 7 tibble containing IHC staining intensities forpIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue
ihc_patient_data <- read.table(file= "../../inst/extdata/ihc_patient_data.txt",header = TRUE) %>% as_tibble() %>% dplyr::group_by(PatientID)
#ihc_surv_data: 200 x 15 tibble containing IHC staining intensities for pIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue, along with meta data (including clinical outcomes) on each patient sample
ihc_surv_data <- read.table(file= "../../inst/extdata/ihc_surv_data.txt",header = TRUE) %>% as_tibble() %>% dplyr::group_by(PatientID)
Process
#convert stain type to factor
ihc_patient_data$Stain <- factor(ihc_patient_data$Stain, levels = c("pSTAT6", "STAT6" , "pIRAK4"))
source("../../R/themes_colors.R")
Beeswarm-boxplot of mean staining intensities of pSTAT6, for healthy and CLL LN samples
Fig7A <-
ihc_patient_data %>%
#get staining intensities for pSTAT6 only
dplyr::filter(Stain %in% c("pSTAT6")) %>%
#plot staining intensity, stratified by CLL / Healthy LN
ggplot(aes(x = Tissue, y = Intensity)) +
geom_boxplot() +
geom_beeswarm(aes(color=Tissue), alpha=1, cex=2) +
#add p values
stat_compare_means(method = "t.test", comparisons=list(c(1,2)), size=6) +
scale_color_manual(values = c(palreds[8], palblues[2])) +
xlab("") +
ylab("Mean pSTAT6 Staining Intensity") +
guides(color = "none") +
scale_x_discrete(labels = c( "CLL"="CLL-infiltrated \nlymph nodes", "LK"="Non-neoplastic \nlymph nodes")) +
coord_cartesian(clip = "off") +
#add preset theme 2
t2
Fig7A
Beeswarm-boxplot of mean staining intensities of pIRAK4, for healthy and CLL LN samples
Fig7B <-
ihc_patient_data %>%
#filter for pIRAK4 staining intensities only
dplyr::filter(Stain%in%c("pIRAK4")) %>%
#plot staining intensity, stratified by CLL / Healthy LN
ggplot(aes(x=Tissue, y=Intensity)) +
geom_boxplot() +
geom_beeswarm(aes(color=Tissue), alpha=1, cex=2) +
#add p values
stat_compare_means(method = "t.test", comparisons=list(c(1,2)), size=6) +
scale_color_manual(values = c(palreds[8], palblues[2])) +
xlab("") +
ylab("Mean pIRAK4 Staining Intensity") +
guides(color="none") +
scale_x_discrete(labels = c( "CLL"="CLL-infiltrated \nlymph nodes", "LK"="Non-neoplastic \nlymph nodes")) +
coord_cartesian(clip = "off") +
#add preset theme 2
t2
Fig7B
Image of IHC cross-section of CLL-infiltrated LN stained for pSTAT6
Fig7C <-cowplot::ggdraw() +
cowplot::draw_image( "../../inst/images/CLL1_I3_pSTAT6.png", scale = 1)
Fig7C
Image of IHC cross-section of healthy LN stained for pSTAT6
##read image
Fig7D <- cowplot::ggdraw() +
cowplot::draw_image("../../inst/images/LK2_B4_pSTAT6.png", scale = 1)
Fig7D
Image of IHC cross-section of CLL-infiltrated LN stained for pIRAK4
Fig7E <- cowplot::ggdraw() +
cowplot::draw_image( "../../inst/images/CLL1_I3_pIRAK4.png", scale = 1)
Fig7E
Image of IHC cross-section of healthy LN stained for pIRAK4
Fig7F <- cowplot::ggdraw() +
cowplot::draw_image( "../../inst/images/LK2_B4_pIRAK4.png", scale = 1)
Fig7F
Kaplan-Meier curve to show associations of pSTAT6 levels with treatment free survival.
In Figures 7G and 7H the two pSTAT6 and pIRAK4 groups (high /low) were defined by mean staining intensities dichotomised using maximally selected rank statistics. The same 64 CLL lymph node samples were used for both Kaplan-Meier plots. 50 patient samples were in the high pSTAT6 group, and 52 in the high pIRAK4 group.
Prework for 7G and H
#Define stains for which want to visualize TTT
stains <- c("pSTAT6", "pIRAK4")
#Get optimal cut offs
stats <- lapply(stains, function(stn){
survival <- dplyr::select(ihc_surv_data, PatientID, Tissue, Diagnosis, Sex, TTT, treatedAfter, stn)
colnames(survival) <- c("PatientID", "Tissue", "Diagnosis", "Sex", "TTT", "treatedAfter", "target")
#Run test to obtain cut off threshold for high pSTAT6 versus low pSTAT6
maxtest <- maxstat.test(Surv(TTT, treatedAfter)~ target,
data = survival,
smethod = "LogRank",
alpha = NULL)
cutpoint <- maxtest$estimate
})
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(stn)` instead of `stn` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
names(stats) <- c("pSTAT6", "pIRAK4")
#Annotate by cutoff point
survdf <- mutate(ihc_surv_data,
phosphoSTAT6 = ifelse(pSTAT6 < stats$pSTAT6, "low", "high"),
phosphoIRAK4 = ifelse(pIRAK4 < stats$pIRAK4, "low", "high"))
#fit survival models
f_pstat6 <- survfit(Surv(TTT, treatedAfter) ~ phosphoSTAT6, data= survdf)
f_pirak4 <- survfit(Surv(TTT, treatedAfter) ~ phosphoIRAK4, data= survdf)
fits <- list(pSTAT6 = f_pstat6, pIRAK4 = f_pirak4)
#Make plot
gg = ggsurvplot_list(fits,
survdf,
pval=TRUE,
palette=c(palreds[8],palreds[3]),
risk.table = TRUE, legend.title = "",
ggtheme = t2,
legend.labs =list(c("High", "Low"),c("High", "Low")),
xlab="Time in years", ylab="\nTime to next treatment (probability)", title= "", legend = "bottom")
#get plot for pSTAT6 stain
Fig7G <-
wrap_elements(gg$pSTAT6$plot +
theme(axis.title.x = element_blank()) +
gg$pSTAT6$table +
theme(plot.title = element_blank()) +
plot_layout(ncol = 1, heights = c(85, 15)))
Fig7G
Kaplan-Meier curve to show associations of pSTAT6 levels with treatment free survival.
#get plot for pIRAK4 stain
Fig7H <- wrap_elements(gg$pIRAK4$plot+
theme(axis.title.x = element_blank()) +
gg$pIRAK4$table +
theme(plot.title = element_blank()) +
plot_layout(ncol=1, heights = c(85, 15)))
Fig7H
design1 <-"
ACG
ADG
BEH
BFH
"
tp <- theme(plot.tag = element_text(size = 30, vjust = 1, face="plain"))
Fig7 <-
wrap_elements(Fig7A) + tp +
wrap_elements(Fig7B) + tp +
Fig7C + tp +
Fig7D + tp +
Fig7E + tp +
Fig7F + tp +
Fig7G + tp +
Fig7H + tp +
plot_layout(design = design1, widths = c(1,0.7,1), heights =c(0.8, 1, 0.8, 1))+
plot_annotation(tag_levels = "A", title="Figure 7", theme = theme(title=element_text(size = 20)))
Fig7
ggsave(Fig7, device = "jpeg", filename = "../../inst/figs/Fig7.jpeg", width = 16, height = 15, units="in", dpi=300)
ihc_surv_data %>%
ungroup() %>%
mutate(TFT = ifelse(is.na(TFT)|TFT == "NA", "NA", "Available" )) %>%
dplyr::group_by(Diagnosis, TFT) %>%
count()
## # A tibble: 3 × 3
## # Groups: Diagnosis, TFT [3]
## Diagnosis TFT n
## <chr> <chr> <int>
## 1 CLL Available 64
## 2 CLL NA 36
## 3 healthy NA 100
Sys.info()
## sysname
## "Darwin"
## release
## "19.6.0"
## version
## "Darwin Kernel Version 19.6.0: Thu May 6 00:48:39 PDT 2021; root:xnu-6153.141.33~1/RELEASE_X86_64"
## nodename
## "mac-huber20.local"
## machine
## "x86_64"
## login
## "holly"
## user
## "holly"
## effective_user
## "holly"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dplyr_1.0.7 ggbeeswarm_0.6.0 rlang_0.4.11 survival_3.2-12
## [5] survminer_0.4.9 ggpubr_0.4.0 ggplot2_3.3.5 maxstat_0.7-25
## [9] patchwork_1.1.1 BiocStyle_2.20.2
##
## loaded via a namespace (and not attached):
## [1] tidyr_1.1.3 splines_4.1.0 carData_3.0-4
## [4] assertthat_0.2.1 BiocManager_1.30.16 highr_0.9
## [7] vipor_0.4.5 cellranger_1.1.0 yaml_2.2.1
## [10] pillar_1.6.2 backports_1.2.1 lattice_0.20-44
## [13] glue_1.4.2 digest_0.6.27 gridtext_0.1.4
## [16] ggsignif_0.6.2 colorspace_2.0-2 cowplot_1.1.1
## [19] htmltools_0.5.1.1 Matrix_1.3-4 pkgconfig_2.0.3
## [22] broom_0.7.9 haven_2.4.3 magick_2.7.2
## [25] bookdown_0.23 purrr_0.3.4 xtable_1.8-4
## [28] mvtnorm_1.1-2 scales_1.1.1 km.ci_0.5-2
## [31] openxlsx_4.2.4 rio_0.5.27 KMsurv_0.1-5
## [34] tibble_3.1.3 generics_0.1.0 farver_2.1.0
## [37] car_3.0-11 ellipsis_0.3.2 withr_2.4.2
## [40] cli_3.0.1 magrittr_2.0.1 crayon_1.4.1
## [43] readxl_1.3.1 ggtext_0.1.1 evaluate_0.14
## [46] fansi_0.5.0 xml2_1.3.2 rstatix_0.7.0
## [49] forcats_0.5.1 foreign_0.8-81 beeswarm_0.4.0
## [52] tools_4.1.0 data.table_1.14.0 hms_1.1.0
## [55] lifecycle_1.0.0 stringr_1.4.0 munsell_0.5.0
## [58] zip_2.2.0 compiler_4.1.0 grid_4.1.0
## [61] rstudioapi_0.13 labeling_0.4.2 rmarkdown_2.10
## [64] gtable_0.3.0 abind_1.4-5 DBI_1.1.1
## [67] curl_4.3.2 markdown_1.1 R6_2.5.0
## [70] gridExtra_2.3 zoo_1.8-9 knitr_1.33
## [73] survMisc_0.5.5 utf8_1.2.2 exactRankTests_0.8-32
## [76] stringi_1.7.3 Rcpp_1.0.7 vctrs_0.3.8
## [79] tidyselect_1.1.1 xfun_0.25